/[pcre]/code/branches/pcre16/pcre_internal.h
ViewVC logotype

Diff of /code/branches/pcre16/pcre_internal.h

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 483 by ph10, Mon Jan 4 16:19:17 2010 UTC revision 517 by ph10, Wed May 5 10:44:20 2010 UTC
# Line 184  preprocessor time in standard C environm Line 184  preprocessor time in standard C environm
184    #error Cannot determine a type for 32-bit unsigned integers    #error Cannot determine a type for 32-bit unsigned integers
185  #endif  #endif
186    
187  /* When checking for integer overflow in pcre_compile(), we need to handle  /* When checking for integer overflow in pcre_compile(), we need to handle
188  large integers. If a 64-bit integer type is available, we can use that.  large integers. If a 64-bit integer type is available, we can use that.
189  Otherwise we have to cast to double, which of course requires floating point  Otherwise we have to cast to double, which of course requires floating point
190  arithmetic. Handle this by defining a macro for the appropriate type. If  arithmetic. Handle this by defining a macro for the appropriate type. If
191  stdint.h is available, include it; it may define INT64_MAX. The macro int64_t  stdint.h is available, include it; it may define INT64_MAX. Systems that do not
192  may be set by "configure". */  have stdint.h (e.g. Solaris) may have inttypes.h. The macro int64_t may be set
193    by "configure". */
194    
195  #if HAVE_STDINT_H  #if HAVE_STDINT_H
196  #include <stdint.h>  #include <stdint.h>
197    #elif HAVE_INTTYPES_H
198    #include <inttypes.h>
199  #endif  #endif
200    
201  #if defined INT64_MAX || defined int64_t  #if defined INT64_MAX || defined int64_t
# Line 872  so that PCRE works on both ASCII and EBC Line 875  so that PCRE works on both ASCII and EBC
875  #define STRING_COMMIT0              "COMMIT\0"  #define STRING_COMMIT0              "COMMIT\0"
876  #define STRING_F0                   "F\0"  #define STRING_F0                   "F\0"
877  #define STRING_FAIL0                "FAIL\0"  #define STRING_FAIL0                "FAIL\0"
878    #define STRING_MARK0                "MARK\0"
879  #define STRING_PRUNE0               "PRUNE\0"  #define STRING_PRUNE0               "PRUNE\0"
880  #define STRING_SKIP0                "SKIP\0"  #define STRING_SKIP0                "SKIP\0"
881  #define STRING_THEN                 "THEN"  #define STRING_THEN                 "THEN"
# Line 1124  only. */ Line 1128  only. */
1128  #define STRING_COMMIT0              STR_C STR_O STR_M STR_M STR_I STR_T "\0"  #define STRING_COMMIT0              STR_C STR_O STR_M STR_M STR_I STR_T "\0"
1129  #define STRING_F0                   STR_F "\0"  #define STRING_F0                   STR_F "\0"
1130  #define STRING_FAIL0                STR_F STR_A STR_I STR_L "\0"  #define STRING_FAIL0                STR_F STR_A STR_I STR_L "\0"
1131    #define STRING_MARK0                STR_M STR_A STR_R STR_K "\0"
1132  #define STRING_PRUNE0               STR_P STR_R STR_U STR_N STR_E "\0"  #define STRING_PRUNE0               STR_P STR_R STR_U STR_N STR_E "\0"
1133  #define STRING_SKIP0                STR_S STR_K STR_I STR_P "\0"  #define STRING_SKIP0                STR_S STR_K STR_I STR_P "\0"
1134  #define STRING_THEN                 STR_T STR_H STR_E STR_N  #define STRING_THEN                 STR_T STR_H STR_E STR_N
# Line 1185  only. */ Line 1190  only. */
1190    
1191  #define PT_ANY        0    /* Any property - matches all chars */  #define PT_ANY        0    /* Any property - matches all chars */
1192  #define PT_LAMP       1    /* L& - the union of Lu, Ll, Lt */  #define PT_LAMP       1    /* L& - the union of Lu, Ll, Lt */
1193  #define PT_GC         2    /* General characteristic (e.g. L) */  #define PT_GC         2    /* Specified general characteristic (e.g. L) */
1194  #define PT_PC         3    /* Particular characteristic (e.g. Lu) */  #define PT_PC         3    /* Specified particular characteristic (e.g. Lu) */
1195  #define PT_SC         4    /* Script (e.g. Han) */  #define PT_SC         4    /* Script (e.g. Han) */
1196    #define PT_ALNUM      5    /* Alphanumeric - the union of L and N */
1197    #define PT_SPACE      6    /* Perl space - Z plus 9,10,12,13 */
1198    #define PT_PXSPACE    7    /* POSIX space - Z plus 9,10,11,12,13 */
1199    #define PT_WORD       8    /* Word - L plus N plus underscore */
1200    
1201  /* Flag bits and data types for the extended class (OP_XCLASS) for classes that  /* Flag bits and data types for the extended class (OP_XCLASS) for classes that
1202  contain UTF-8 characters with values greater than 255. */  contain UTF-8 characters with values greater than 255. */
# Line 1204  contain UTF-8 characters with values gre Line 1213  contain UTF-8 characters with values gre
1213  /* These are escaped items that aren't just an encoding of a particular data  /* These are escaped items that aren't just an encoding of a particular data
1214  value such as \n. They must have non-zero values, as check_escape() returns  value such as \n. They must have non-zero values, as check_escape() returns
1215  their negation. Also, they must appear in the same order as in the opcode  their negation. Also, they must appear in the same order as in the opcode
1216  definitions below, up to ESC_z. There's a dummy for OP_ANY because it  definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it
1217  corresponds to "." rather than an escape sequence, and another for OP_ALLANY  corresponds to "." in DOTALL mode rather than an escape sequence. It is also
1218  (which is used for [^] in JavaScript compatibility mode).  used for [^] in JavaScript compatibility mode. In non-DOTALL mode, "." behaves
1219    like \N.
1220    
1221  The final escape must be ESC_REF as subsequent values are used for  The final escape must be ESC_REF as subsequent values are used for
1222  backreferences (\1, \2, \3, etc). There are two tests in the code for an escape  backreferences (\1, \2, \3, etc). There are two tests in the code for an escape
# Line 1216  put in between that don't consume a char Line 1226  put in between that don't consume a char
1226  */  */
1227    
1228  enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,  enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
1229         ESC_W, ESC_w, ESC_dum1, ESC_dum2, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H,         ESC_W, ESC_w, ESC_N, ESC_dum, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H,
1230         ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z, ESC_E, ESC_Q, ESC_g, ESC_k,         ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z, ESC_E, ESC_Q, ESC_g, ESC_k,
1231         ESC_REF };         ESC_REF };
1232    
# Line 1244  enum { Line 1254  enum {
1254    OP_WHITESPACE,         /*  9 \s */    OP_WHITESPACE,         /*  9 \s */
1255    OP_NOT_WORDCHAR,       /* 10 \W */    OP_NOT_WORDCHAR,       /* 10 \W */
1256    OP_WORDCHAR,           /* 11 \w */    OP_WORDCHAR,           /* 11 \w */
1257    OP_ANY,            /* 12 Match any character (subject to DOTALL) */    OP_ANY,            /* 12 Match any character except newline */
1258    OP_ALLANY,         /* 13 Match any character (not subject to DOTALL) */    OP_ALLANY,         /* 13 Match any character */
1259    OP_ANYBYTE,        /* 14 Match any byte (\C); different to OP_ANY for UTF-8 */    OP_ANYBYTE,        /* 14 Match any byte (\C); different to OP_ANY for UTF-8 */
1260    OP_NOTPROP,        /* 15 \P (not Unicode property) */    OP_NOTPROP,        /* 15 \P (not Unicode property) */
1261    OP_PROP,           /* 16 \p (Unicode property) */    OP_PROP,           /* 16 \p (Unicode property) */
# Line 1375  enum { Line 1385  enum {
1385    
1386    /* These are backtracking control verbs */    /* These are backtracking control verbs */
1387    
1388    OP_PRUNE,          /* 107 */    OP_MARK,           /* 107 always has an argument */
1389    OP_SKIP,           /* 108 */    OP_PRUNE,          /* 108 */
1390    OP_THEN,           /* 109 */    OP_PRUNE_ARG,      /* 109 same, but with argument */
1391    OP_COMMIT,         /* 110 */    OP_SKIP,           /* 110 */
1392      OP_SKIP_ARG,       /* 111 same, but with argument */
1393      OP_THEN,           /* 112 */
1394      OP_THEN_ARG,       /* 113 same, but with argument */
1395      OP_COMMIT,         /* 114 */
1396    
1397    /* These are forced failure and success verbs */    /* These are forced failure and success verbs */
1398    
1399    OP_FAIL,           /* 111 */    OP_FAIL,           /* 115 */
1400    OP_ACCEPT,         /* 112 */    OP_ACCEPT,         /* 116 */
1401    OP_CLOSE,          /* 113 Used before OP_ACCEPT to close open captures */    OP_CLOSE,          /* 117 Used before OP_ACCEPT to close open captures */
1402    
1403    /* This is used to skip a subpattern with a {0} quantifier */    /* This is used to skip a subpattern with a {0} quantifier */
1404    
1405    OP_SKIPZERO        /* 114 */    OP_SKIPZERO,       /* 118 */
1406    
1407      /* This is not an opcode, but is used to check that tables indexed by opcode
1408      are the correct length, in order to catch updating errors - there have been
1409      some in the past. */
1410    
1411      OP_TABLE_LENGTH
1412  };  };
1413    
1414  /* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro  /* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
1415  definitions that follow must also be updated to match. There are also tables  definitions that follow must also be updated to match. There are also tables
1416  called "coptable" cna "poptable" in pcre_dfa_exec.c that must be updated. */  called "coptable" and "poptable" in pcre_dfa_exec.c that must be updated. */
1417    
1418    
1419  /* This macro defines textual names for all the opcodes. These are used only  /* This macro defines textual names for all the opcodes. These are used only
# Line 1418  for debugging. The macro is referenced o Line 1438  for debugging. The macro is referenced o
1438    "Once", "Bra", "CBra", "Cond", "SBra", "SCBra", "SCond",        \    "Once", "Bra", "CBra", "Cond", "SBra", "SCBra", "SCond",        \
1439    "Cond ref", "Cond nref", "Cond rec", "Cond nrec", "Cond def",   \    "Cond ref", "Cond nref", "Cond rec", "Cond nrec", "Cond def",   \
1440    "Brazero", "Braminzero",                                        \    "Brazero", "Braminzero",                                        \
1441    "*PRUNE", "*SKIP", "*THEN", "*COMMIT", "*FAIL", "*ACCEPT",      \    "*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP",                  \
1442      "*THEN", "*THEN", "*COMMIT", "*FAIL", "*ACCEPT",                \
1443    "Close", "Skip zero"    "Close", "Skip zero"
1444    
1445    
# Line 1436  in UTF-8 mode. The code that uses this t Line 1457  in UTF-8 mode. The code that uses this t
1457    1, 1, 1, 1, 1,                 /* \A, \G, \K, \B, \b                     */ \    1, 1, 1, 1, 1,                 /* \A, \G, \K, \B, \b                     */ \
1458    1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */ \    1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */ \
1459    1, 1, 1,                       /* Any, AllAny, Anybyte                   */ \    1, 1, 1,                       /* Any, AllAny, Anybyte                   */ \
1460    3, 3, 1,                       /* NOTPROP, PROP, EXTUNI                  */ \    3, 3,                          /* \P, \p                                 */ \
1461    1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */ \    1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */ \
1462      1,                             /* \X                                     */ \
1463    1, 1, 2, 1, 1,                 /* \Z, \z, Opt, ^, $                      */ \    1, 1, 2, 1, 1,                 /* \Z, \z, Opt, ^, $                      */ \
1464    2,                             /* Char  - the minimum length             */ \    2,                             /* Char  - the minimum length             */ \
1465    2,                             /* Charnc  - the minimum length           */ \    2,                             /* Charnc  - the minimum length           */ \
# Line 1483  in UTF-8 mode. The code that uses this t Line 1505  in UTF-8 mode. The code that uses this t
1505    3, 3,                          /* RREF, NRREF                            */ \    3, 3,                          /* RREF, NRREF                            */ \
1506    1,                             /* DEF                                    */ \    1,                             /* DEF                                    */ \
1507    1, 1,                          /* BRAZERO, BRAMINZERO                    */ \    1, 1,                          /* BRAZERO, BRAMINZERO                    */ \
1508    1, 1, 1, 1,                    /* PRUNE, SKIP, THEN, COMMIT,             */ \    3, 1, 3,                       /* MARK, PRUNE, PRUNE_ARG,                */ \
1509    1, 1, 3, 1                     /* FAIL, ACCEPT, CLOSE, SKIPZERO          */    1, 3, 1, 3,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG,        */ \
1510      1, 1, 1, 3, 1                  /* COMMIT, FAIL, ACCEPT, CLOSE, SKIPZERO  */
1511    
1512    
1513  /* A magic value for OP_RREF and OP_NRREF to indicate the "any recursion"  /* A magic value for OP_RREF and OP_NRREF to indicate the "any recursion"
# Line 1492  condition. */ Line 1515  condition. */
1515    
1516  #define RREF_ANY  0xffff  #define RREF_ANY  0xffff
1517    
1518  /* Error code numbers. They are given names so that they can more easily be  /* Compile time error code numbers. They are given names so that they can more
1519  tracked. */  easily be tracked. When a new number is added, the table called eint in
1520    pcreposix.c must be updated. */
1521    
1522  enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9,  enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9,
1523         ERR10, ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19,         ERR10, ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19,
# Line 1501  enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4, Line 1525  enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,
1525         ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,         ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
1526         ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,         ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
1527         ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,         ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
1528         ERR60, ERR61, ERR62, ERR63, ERR64, ERR65 };         ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERRCOUNT };
1529    
1530  /* The real format of the start of the pcre block; the index of names and the  /* The real format of the start of the pcre block; the index of names and the
1531  code vector run on as long as necessary after the end. We store an explicit  code vector run on as long as necessary after the end. We store an explicit
# Line 1551  typedef struct pcre_study_data { Line 1575  typedef struct pcre_study_data {
1575    
1576  /* Structure for building a chain of open capturing subpatterns during  /* Structure for building a chain of open capturing subpatterns during
1577  compiling, so that instructions to close them can be compiled when (*ACCEPT) is  compiling, so that instructions to close them can be compiled when (*ACCEPT) is
1578  encountered. */  encountered. This is also used to identify subpatterns that contain recursive
1579    back references to themselves, so that they can be made atomic. */
1580    
1581  typedef struct open_capitem {  typedef struct open_capitem {
1582    struct open_capitem *next;    /* Chain link */    struct open_capitem *next;    /* Chain link */
1583    pcre_uint16 number;           /* Capture number */    pcre_uint16 number;           /* Capture number */
1584      pcre_uint16 flag;             /* Set TRUE if recursive back ref */
1585  } open_capitem;  } open_capitem;
1586    
1587  /* Structure for passing "static" information around between the functions  /* Structure for passing "static" information around between the functions
# Line 1604  typedef struct recursion_info { Line 1630  typedef struct recursion_info {
1630    struct recursion_info *prevrec; /* Previous recursion record (or NULL) */    struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
1631    int group_num;                /* Number of group that was called */    int group_num;                /* Number of group that was called */
1632    const uschar *after_call;     /* "Return value": points after the call in the expr */    const uschar *after_call;     /* "Return value": points after the call in the expr */
   USPTR save_start;             /* Old value of mstart */  
1633    int *offset_save;             /* Pointer to start of saved offsets */    int *offset_save;             /* Pointer to start of saved offsets */
1634    int saved_max;                /* Number of saved offsets */    int saved_max;                /* Number of saved offsets */
1635    int save_offset_top;          /* Current value of offset_top */    int save_offset_top;          /* Current value of offset_top */
# Line 1662  typedef struct match_data { Line 1687  typedef struct match_data {
1687    int    eptrn;                 /* Next free eptrblock */    int    eptrn;                 /* Next free eptrblock */
1688    recursion_info *recursive;    /* Linked list of recursion data */    recursion_info *recursive;    /* Linked list of recursion data */
1689    void  *callout_data;          /* To pass back to callouts */    void  *callout_data;          /* To pass back to callouts */
1690      const uschar *mark;           /* Mark pointer to pass back */
1691  } match_data;  } match_data;
1692    
1693  /* A similar structure is used for the same purpose by the DFA matching  /* A similar structure is used for the same purpose by the DFA matching

Legend:
Removed from v.483  
changed lines
  Added in v.517

  ViewVC Help
Powered by ViewVC 1.1.5