/[pcre]/code/trunk/pcre_internal.h
ViewVC logotype

Diff of /code/trunk/pcre_internal.h

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 507 by ph10, Wed Mar 10 16:08:01 2010 UTC revision 518 by ph10, Tue May 18 15:47:01 2010 UTC
# Line 580  time, run time, or study time, respectiv Line 580  time, run time, or study time, respectiv
580     PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \     PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
581     PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \     PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \
582     PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \     PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \
583     PCRE_JAVASCRIPT_COMPAT)     PCRE_JAVASCRIPT_COMPAT|PCRE_UCP)
584    
585  #define PUBLIC_EXEC_OPTIONS \  #define PUBLIC_EXEC_OPTIONS \
586    (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NOTEMPTY_ATSTART| \    (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NOTEMPTY_ATSTART| \
# Line 875  so that PCRE works on both ASCII and EBC Line 875  so that PCRE works on both ASCII and EBC
875  #define STRING_COMMIT0              "COMMIT\0"  #define STRING_COMMIT0              "COMMIT\0"
876  #define STRING_F0                   "F\0"  #define STRING_F0                   "F\0"
877  #define STRING_FAIL0                "FAIL\0"  #define STRING_FAIL0                "FAIL\0"
878    #define STRING_MARK0                "MARK\0"
879  #define STRING_PRUNE0               "PRUNE\0"  #define STRING_PRUNE0               "PRUNE\0"
880  #define STRING_SKIP0                "SKIP\0"  #define STRING_SKIP0                "SKIP\0"
881  #define STRING_THEN                 "THEN"  #define STRING_THEN                 "THEN"
# Line 904  so that PCRE works on both ASCII and EBC Line 905  so that PCRE works on both ASCII and EBC
905  #define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)"  #define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)"
906  #define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)"  #define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)"
907  #define STRING_UTF8_RIGHTPAR        "UTF8)"  #define STRING_UTF8_RIGHTPAR        "UTF8)"
908    #define STRING_UCP_RIGHTPAR         "UCP)"
909    
910  #else  /* SUPPORT_UTF8 */  #else  /* SUPPORT_UTF8 */
911    
# Line 1127  only. */ Line 1129  only. */
1129  #define STRING_COMMIT0              STR_C STR_O STR_M STR_M STR_I STR_T "\0"  #define STRING_COMMIT0              STR_C STR_O STR_M STR_M STR_I STR_T "\0"
1130  #define STRING_F0                   STR_F "\0"  #define STRING_F0                   STR_F "\0"
1131  #define STRING_FAIL0                STR_F STR_A STR_I STR_L "\0"  #define STRING_FAIL0                STR_F STR_A STR_I STR_L "\0"
1132    #define STRING_MARK0                STR_M STR_A STR_R STR_K "\0"
1133  #define STRING_PRUNE0               STR_P STR_R STR_U STR_N STR_E "\0"  #define STRING_PRUNE0               STR_P STR_R STR_U STR_N STR_E "\0"
1134  #define STRING_SKIP0                STR_S STR_K STR_I STR_P "\0"  #define STRING_SKIP0                STR_S STR_K STR_I STR_P "\0"
1135  #define STRING_THEN                 STR_T STR_H STR_E STR_N  #define STRING_THEN                 STR_T STR_H STR_E STR_N
# Line 1156  only. */ Line 1159  only. */
1159  #define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS  #define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
1160  #define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS  #define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS
1161  #define STRING_UTF8_RIGHTPAR        STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS  #define STRING_UTF8_RIGHTPAR        STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
1162    #define STRING_UCP_RIGHTPAR         STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
1163    
1164  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
1165    
# Line 1188  only. */ Line 1192  only. */
1192    
1193  #define PT_ANY        0    /* Any property - matches all chars */  #define PT_ANY        0    /* Any property - matches all chars */
1194  #define PT_LAMP       1    /* L& - the union of Lu, Ll, Lt */  #define PT_LAMP       1    /* L& - the union of Lu, Ll, Lt */
1195  #define PT_GC         2    /* General characteristic (e.g. L) */  #define PT_GC         2    /* Specified general characteristic (e.g. L) */
1196  #define PT_PC         3    /* Particular characteristic (e.g. Lu) */  #define PT_PC         3    /* Specified particular characteristic (e.g. Lu) */
1197  #define PT_SC         4    /* Script (e.g. Han) */  #define PT_SC         4    /* Script (e.g. Han) */
1198    #define PT_ALNUM      5    /* Alphanumeric - the union of L and N */
1199    #define PT_SPACE      6    /* Perl space - Z plus 9,10,12,13 */
1200    #define PT_PXSPACE    7    /* POSIX space - Z plus 9,10,11,12,13 */
1201    #define PT_WORD       8    /* Word - L plus N plus underscore */
1202    
1203  /* Flag bits and data types for the extended class (OP_XCLASS) for classes that  /* Flag bits and data types for the extended class (OP_XCLASS) for classes that
1204  contain UTF-8 characters with values greater than 255. */  contain UTF-8 characters with values greater than 255. */
# Line 1207  contain UTF-8 characters with values gre Line 1215  contain UTF-8 characters with values gre
1215  /* These are escaped items that aren't just an encoding of a particular data  /* These are escaped items that aren't just an encoding of a particular data
1216  value such as \n. They must have non-zero values, as check_escape() returns  value such as \n. They must have non-zero values, as check_escape() returns
1217  their negation. Also, they must appear in the same order as in the opcode  their negation. Also, they must appear in the same order as in the opcode
1218  definitions below, up to ESC_z. There's a dummy for OP_ANY because it  definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it
1219  corresponds to "." rather than an escape sequence, and another for OP_ALLANY  corresponds to "." in DOTALL mode rather than an escape sequence. It is also
1220  (which is used for [^] in JavaScript compatibility mode).  used for [^] in JavaScript compatibility mode. In non-DOTALL mode, "." behaves
1221    like \N.
1222    
1223    The special values ESC_DU, ESC_du, etc. are used instead of ESC_D, ESC_d, etc.
1224    when PCRE_UCP is set, when replacement of \d etc by \p sequences is required.
1225    They must be contiguous, and remain in order so that the replacements can be
1226    looked up from a table.
1227    
1228  The final escape must be ESC_REF as subsequent values are used for  The final escape must be ESC_REF as subsequent values are used for
1229  backreferences (\1, \2, \3, etc). There are two tests in the code for an escape  backreferences (\1, \2, \3, etc). There are two tests in the code for an escape
# Line 1219  put in between that don't consume a char Line 1233  put in between that don't consume a char
1233  */  */
1234    
1235  enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,  enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
1236         ESC_W, ESC_w, ESC_dum1, ESC_dum2, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H,         ESC_W, ESC_w, ESC_N, ESC_dum, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H,
1237         ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z, ESC_E, ESC_Q, ESC_g, ESC_k,         ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z,
1238           ESC_E, ESC_Q, ESC_g, ESC_k,
1239           ESC_DU, ESC_du, ESC_SU, ESC_su, ESC_WU, ESC_wu,
1240         ESC_REF };         ESC_REF };
1241    
   
1242  /* Opcode table: Starting from 1 (i.e. after OP_END), the values up to  /* Opcode table: Starting from 1 (i.e. after OP_END), the values up to
1243  OP_EOD must correspond in order to the list of escapes immediately above.  OP_EOD must correspond in order to the list of escapes immediately above.
1244    
# Line 1247  enum { Line 1262  enum {
1262    OP_WHITESPACE,         /*  9 \s */    OP_WHITESPACE,         /*  9 \s */
1263    OP_NOT_WORDCHAR,       /* 10 \W */    OP_NOT_WORDCHAR,       /* 10 \W */
1264    OP_WORDCHAR,           /* 11 \w */    OP_WORDCHAR,           /* 11 \w */
1265    OP_ANY,            /* 12 Match any character (subject to DOTALL) */    OP_ANY,            /* 12 Match any character except newline */
1266    OP_ALLANY,         /* 13 Match any character (not subject to DOTALL) */    OP_ALLANY,         /* 13 Match any character */
1267    OP_ANYBYTE,        /* 14 Match any byte (\C); different to OP_ANY for UTF-8 */    OP_ANYBYTE,        /* 14 Match any byte (\C); different to OP_ANY for UTF-8 */
1268    OP_NOTPROP,        /* 15 \P (not Unicode property) */    OP_NOTPROP,        /* 15 \P (not Unicode property) */
1269    OP_PROP,           /* 16 \p (Unicode property) */    OP_PROP,           /* 16 \p (Unicode property) */
# Line 1378  enum { Line 1393  enum {
1393    
1394    /* These are backtracking control verbs */    /* These are backtracking control verbs */
1395    
1396    OP_PRUNE,          /* 107 */    OP_MARK,           /* 107 always has an argument */
1397    OP_SKIP,           /* 108 */    OP_PRUNE,          /* 108 */
1398    OP_THEN,           /* 109 */    OP_PRUNE_ARG,      /* 109 same, but with argument */
1399    OP_COMMIT,         /* 110 */    OP_SKIP,           /* 110 */
1400      OP_SKIP_ARG,       /* 111 same, but with argument */
1401      OP_THEN,           /* 112 */
1402      OP_THEN_ARG,       /* 113 same, but with argument */
1403      OP_COMMIT,         /* 114 */
1404    
1405    /* These are forced failure and success verbs */    /* These are forced failure and success verbs */
1406    
1407    OP_FAIL,           /* 111 */    OP_FAIL,           /* 115 */
1408    OP_ACCEPT,         /* 112 */    OP_ACCEPT,         /* 116 */
1409    OP_CLOSE,          /* 113 Used before OP_ACCEPT to close open captures */    OP_CLOSE,          /* 117 Used before OP_ACCEPT to close open captures */
1410    
1411    /* This is used to skip a subpattern with a {0} quantifier */    /* This is used to skip a subpattern with a {0} quantifier */
1412    
1413    OP_SKIPZERO,       /* 114 */    OP_SKIPZERO,       /* 118 */
1414    
1415    /* This is not an opcode, but is used to check that tables indexed by opcode    /* This is not an opcode, but is used to check that tables indexed by opcode
1416    are the correct length, in order to catch updating errors - there have been    are the correct length, in order to catch updating errors - there have been
# Line 1402  enum { Line 1421  enum {
1421    
1422  /* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro  /* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
1423  definitions that follow must also be updated to match. There are also tables  definitions that follow must also be updated to match. There are also tables
1424  called "coptable" cna "poptable" in pcre_dfa_exec.c that must be updated. */  called "coptable" and "poptable" in pcre_dfa_exec.c that must be updated. */
1425    
1426    
1427  /* This macro defines textual names for all the opcodes. These are used only  /* This macro defines textual names for all the opcodes. These are used only
# Line 1427  for debugging. The macro is referenced o Line 1446  for debugging. The macro is referenced o
1446    "Once", "Bra", "CBra", "Cond", "SBra", "SCBra", "SCond",        \    "Once", "Bra", "CBra", "Cond", "SBra", "SCBra", "SCond",        \
1447    "Cond ref", "Cond nref", "Cond rec", "Cond nrec", "Cond def",   \    "Cond ref", "Cond nref", "Cond rec", "Cond nrec", "Cond def",   \
1448    "Brazero", "Braminzero",                                        \    "Brazero", "Braminzero",                                        \
1449    "*PRUNE", "*SKIP", "*THEN", "*COMMIT", "*FAIL", "*ACCEPT",      \    "*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP",                  \
1450      "*THEN", "*THEN", "*COMMIT", "*FAIL", "*ACCEPT",                \
1451    "Close", "Skip zero"    "Close", "Skip zero"
1452    
1453    
# Line 1493  in UTF-8 mode. The code that uses this t Line 1513  in UTF-8 mode. The code that uses this t
1513    3, 3,                          /* RREF, NRREF                            */ \    3, 3,                          /* RREF, NRREF                            */ \
1514    1,                             /* DEF                                    */ \    1,                             /* DEF                                    */ \
1515    1, 1,                          /* BRAZERO, BRAMINZERO                    */ \    1, 1,                          /* BRAZERO, BRAMINZERO                    */ \
1516    1, 1, 1, 1,                    /* PRUNE, SKIP, THEN, COMMIT,             */ \    3, 1, 3,                       /* MARK, PRUNE, PRUNE_ARG,                */ \
1517    1, 1, 3, 1                     /* FAIL, ACCEPT, CLOSE, SKIPZERO          */    1, 3, 1, 3,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG,        */ \
1518      1, 1, 1, 3, 1                  /* COMMIT, FAIL, ACCEPT, CLOSE, SKIPZERO  */
1519    
1520    
1521  /* A magic value for OP_RREF and OP_NRREF to indicate the "any recursion"  /* A magic value for OP_RREF and OP_NRREF to indicate the "any recursion"
# Line 1512  enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4, Line 1533  enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,
1533         ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,         ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
1534         ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,         ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
1535         ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,         ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
1536         ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERRCOUNT };         ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERRCOUNT };
1537    
1538  /* The real format of the start of the pcre block; the index of names and the  /* The real format of the start of the pcre block; the index of names and the
1539  code vector run on as long as necessary after the end. We store an explicit  code vector run on as long as necessary after the end. We store an explicit
# Line 1655  typedef struct match_data { Line 1676  typedef struct match_data {
1676    BOOL   noteol;                /* NOTEOL flag */    BOOL   noteol;                /* NOTEOL flag */
1677    BOOL   utf8;                  /* UTF8 flag */    BOOL   utf8;                  /* UTF8 flag */
1678    BOOL   jscript_compat;        /* JAVASCRIPT_COMPAT flag */    BOOL   jscript_compat;        /* JAVASCRIPT_COMPAT flag */
1679      BOOL   use_ucp;               /* PCRE_UCP flag */
1680    BOOL   endonly;               /* Dollar not before final \n */    BOOL   endonly;               /* Dollar not before final \n */
1681    BOOL   notempty;              /* Empty string match not wanted */    BOOL   notempty;              /* Empty string match not wanted */
1682    BOOL   notempty_atstart;      /* Empty string match at start not wanted */    BOOL   notempty_atstart;      /* Empty string match at start not wanted */
# Line 1674  typedef struct match_data { Line 1696  typedef struct match_data {
1696    int    eptrn;                 /* Next free eptrblock */    int    eptrn;                 /* Next free eptrblock */
1697    recursion_info *recursive;    /* Linked list of recursion data */    recursion_info *recursive;    /* Linked list of recursion data */
1698    void  *callout_data;          /* To pass back to callouts */    void  *callout_data;          /* To pass back to callouts */
1699      const uschar *mark;           /* Mark pointer to pass back */
1700  } match_data;  } match_data;
1701    
1702  /* A similar structure is used for the same purpose by the DFA matching  /* A similar structure is used for the same purpose by the DFA matching

Legend:
Removed from v.507  
changed lines
  Added in v.518

  ViewVC Help
Powered by ViewVC 1.1.5