/[pcre]/code/trunk/pcre_internal.h
ViewVC logotype

Diff of /code/trunk/pcre_internal.h

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 459 by ph10, Sun Oct 4 09:21:39 2009 UTC revision 510 by ph10, Sat Mar 27 17:45:29 2010 UTC
# Line 7  Line 7 
7  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
8    
9                         Written by Philip Hazel                         Written by Philip Hazel
10             Copyright (c) 1997-2009 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
11    
12  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
13  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 45  functions whose names all begin with "_p Line 45  functions whose names all begin with "_p
45  #ifndef PCRE_INTERNAL_H  #ifndef PCRE_INTERNAL_H
46  #define PCRE_INTERNAL_H  #define PCRE_INTERNAL_H
47    
48  /* Define DEBUG to get debugging output on stdout. */  /* Define PCRE_DEBUG to get debugging output on stdout. */
49    
50  #if 0  #if 0
51  #define DEBUG  #define PCRE_DEBUG
52  #endif  #endif
53    
54  /* We do not support both EBCDIC and UTF-8 at the same time. The "configure"  /* We do not support both EBCDIC and UTF-8 at the same time. The "configure"
# Line 74  It turns out that the Mac Debugging.h he Line 74  It turns out that the Mac Debugging.h he
74  be absolutely sure we get our version. */  be absolutely sure we get our version. */
75    
76  #undef DPRINTF  #undef DPRINTF
77  #ifdef DEBUG  #ifdef PCRE_DEBUG
78  #define DPRINTF(p) printf p  #define DPRINTF(p) printf p
79  #else  #else
80  #define DPRINTF(p) /* Nothing */  #define DPRINTF(p) /* Nothing */
# Line 86  setjmp and stdarg are used is when NO_RE Line 86  setjmp and stdarg are used is when NO_RE
86    
87  #include <ctype.h>  #include <ctype.h>
88  #include <limits.h>  #include <limits.h>
 #include <setjmp.h>  
 #include <stdarg.h>  
89  #include <stddef.h>  #include <stddef.h>
90  #include <stdio.h>  #include <stdio.h>
91  #include <stdlib.h>  #include <stdlib.h>
# Line 186  preprocessor time in standard C environm Line 184  preprocessor time in standard C environm
184    #error Cannot determine a type for 32-bit unsigned integers    #error Cannot determine a type for 32-bit unsigned integers
185  #endif  #endif
186    
187    /* When checking for integer overflow in pcre_compile(), we need to handle
188    large integers. If a 64-bit integer type is available, we can use that.
189    Otherwise we have to cast to double, which of course requires floating point
190    arithmetic. Handle this by defining a macro for the appropriate type. If
191    stdint.h is available, include it; it may define INT64_MAX. Systems that do not
192    have stdint.h (e.g. Solaris) may have inttypes.h. The macro int64_t may be set
193    by "configure". */
194    
195    #if HAVE_STDINT_H
196    #include <stdint.h>
197    #elif HAVE_INTTYPES_H
198    #include <inttypes.h>
199    #endif
200    
201    #if defined INT64_MAX || defined int64_t
202    #define INT64_OR_DOUBLE int64_t
203    #else
204    #define INT64_OR_DOUBLE double
205    #endif
206    
207  /* All character handling must be done as unsigned characters. Otherwise there  /* All character handling must be done as unsigned characters. Otherwise there
208  are problems with top-bit-set characters and functions such as isspace().  are problems with top-bit-set characters and functions such as isspace().
209  However, we leave the interface to the outside world as char *, because that  However, we leave the interface to the outside world as char *, because that
# Line 857  so that PCRE works on both ASCII and EBC Line 875  so that PCRE works on both ASCII and EBC
875  #define STRING_COMMIT0              "COMMIT\0"  #define STRING_COMMIT0              "COMMIT\0"
876  #define STRING_F0                   "F\0"  #define STRING_F0                   "F\0"
877  #define STRING_FAIL0                "FAIL\0"  #define STRING_FAIL0                "FAIL\0"
878    #define STRING_MARK0                "MARK\0"
879  #define STRING_PRUNE0               "PRUNE\0"  #define STRING_PRUNE0               "PRUNE\0"
880  #define STRING_SKIP0                "SKIP\0"  #define STRING_SKIP0                "SKIP\0"
881  #define STRING_THEN                 "THEN"  #define STRING_THEN                 "THEN"
# Line 1109  only. */ Line 1128  only. */
1128  #define STRING_COMMIT0              STR_C STR_O STR_M STR_M STR_I STR_T "\0"  #define STRING_COMMIT0              STR_C STR_O STR_M STR_M STR_I STR_T "\0"
1129  #define STRING_F0                   STR_F "\0"  #define STRING_F0                   STR_F "\0"
1130  #define STRING_FAIL0                STR_F STR_A STR_I STR_L "\0"  #define STRING_FAIL0                STR_F STR_A STR_I STR_L "\0"
1131    #define STRING_MARK0                STR_M STR_A STR_R STR_K "\0"
1132  #define STRING_PRUNE0               STR_P STR_R STR_U STR_N STR_E "\0"  #define STRING_PRUNE0               STR_P STR_R STR_U STR_N STR_E "\0"
1133  #define STRING_SKIP0                STR_S STR_K STR_I STR_P "\0"  #define STRING_SKIP0                STR_S STR_K STR_I STR_P "\0"
1134  #define STRING_THEN                 STR_T STR_H STR_E STR_N  #define STRING_THEN                 STR_T STR_H STR_E STR_N
# Line 1210  enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, E Line 1230  enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, E
1230  OP_EOD must correspond in order to the list of escapes immediately above.  OP_EOD must correspond in order to the list of escapes immediately above.
1231    
1232  *** NOTE NOTE NOTE *** Whenever this list is updated, the two macro definitions  *** NOTE NOTE NOTE *** Whenever this list is updated, the two macro definitions
1233  that follow must also be updated to match. There is also a table called  that follow must also be updated to match. There are also tables called
1234  "coptable" in pcre_dfa_exec.c that must be updated. */  "coptable" and "poptable" in pcre_dfa_exec.c that must be updated. */
1235    
1236  enum {  enum {
1237    OP_END,            /* 0 End of pattern */    OP_END,            /* 0 End of pattern */
# Line 1348  enum { Line 1368  enum {
1368    OP_SCOND,          /* 99 Conditional group, check empty */    OP_SCOND,          /* 99 Conditional group, check empty */
1369    
1370    /* The next two pairs must (respectively) be kept together. */    /* The next two pairs must (respectively) be kept together. */
1371    
1372    OP_CREF,           /* 100 Used to hold a capture number as condition */    OP_CREF,           /* 100 Used to hold a capture number as condition */
1373    OP_NCREF,          /* 101 Same, but generaged by a name reference*/    OP_NCREF,          /* 101 Same, but generaged by a name reference*/
1374    OP_RREF,           /* 102 Used to hold a recursion number as condition */    OP_RREF,           /* 102 Used to hold a recursion number as condition */
# Line 1360  enum { Line 1380  enum {
1380    
1381    /* These are backtracking control verbs */    /* These are backtracking control verbs */
1382    
1383    OP_PRUNE,          /* 107 */    OP_MARK,           /* 107 always has an argument */
1384    OP_SKIP,           /* 108 */    OP_PRUNE,          /* 108 */
1385    OP_THEN,           /* 109 */    OP_PRUNE_ARG,      /* 109 same, but with argument */
1386    OP_COMMIT,         /* 110 */    OP_SKIP,           /* 110 */
1387      OP_SKIP_ARG,       /* 111 same, but with argument */
1388      OP_THEN,           /* 112 */
1389      OP_THEN_ARG,       /* 113 same, but with argument */
1390      OP_COMMIT,         /* 114 */
1391    
1392    /* These are forced failure and success verbs */    /* These are forced failure and success verbs */
1393    
1394    OP_FAIL,           /* 111 */    OP_FAIL,           /* 115 */
1395    OP_ACCEPT,         /* 112 */    OP_ACCEPT,         /* 116 */
1396    OP_CLOSE,          /* 113 Used before OP_ACCEPT to close open captures */    OP_CLOSE,          /* 117 Used before OP_ACCEPT to close open captures */
1397    
1398    /* This is used to skip a subpattern with a {0} quantifier */    /* This is used to skip a subpattern with a {0} quantifier */
1399    
1400    OP_SKIPZERO        /* 114 */    OP_SKIPZERO,       /* 118 */
1401    
1402      /* This is not an opcode, but is used to check that tables indexed by opcode
1403      are the correct length, in order to catch updating errors - there have been
1404      some in the past. */
1405    
1406      OP_TABLE_LENGTH
1407  };  };
1408    
1409    /* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
1410    definitions that follow must also be updated to match. There are also tables
1411    called "coptable" and "poptable" in pcre_dfa_exec.c that must be updated. */
1412    
1413    
1414  /* This macro defines textual names for all the opcodes. These are used only  /* This macro defines textual names for all the opcodes. These are used only
1415  for debugging. The macro is referenced only in pcre_printint.c. */  for debugging. The macro is referenced only in pcre_printint.c. */
# Line 1399  for debugging. The macro is referenced o Line 1433  for debugging. The macro is referenced o
1433    "Once", "Bra", "CBra", "Cond", "SBra", "SCBra", "SCond",        \    "Once", "Bra", "CBra", "Cond", "SBra", "SCBra", "SCond",        \
1434    "Cond ref", "Cond nref", "Cond rec", "Cond nrec", "Cond def",   \    "Cond ref", "Cond nref", "Cond rec", "Cond nrec", "Cond def",   \
1435    "Brazero", "Braminzero",                                        \    "Brazero", "Braminzero",                                        \
1436    "*PRUNE", "*SKIP", "*THEN", "*COMMIT", "*FAIL", "*ACCEPT",      \    "*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP",                  \
1437      "*THEN", "*THEN", "*COMMIT", "*FAIL", "*ACCEPT",                \
1438    "Close", "Skip zero"    "Close", "Skip zero"
1439    
1440    
# Line 1417  in UTF-8 mode. The code that uses this t Line 1452  in UTF-8 mode. The code that uses this t
1452    1, 1, 1, 1, 1,                 /* \A, \G, \K, \B, \b                     */ \    1, 1, 1, 1, 1,                 /* \A, \G, \K, \B, \b                     */ \
1453    1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */ \    1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */ \
1454    1, 1, 1,                       /* Any, AllAny, Anybyte                   */ \    1, 1, 1,                       /* Any, AllAny, Anybyte                   */ \
1455    3, 3, 1,                       /* NOTPROP, PROP, EXTUNI                  */ \    3, 3,                          /* \P, \p                                 */ \
1456    1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */ \    1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */ \
1457      1,                             /* \X                                     */ \
1458    1, 1, 2, 1, 1,                 /* \Z, \z, Opt, ^, $                      */ \    1, 1, 2, 1, 1,                 /* \Z, \z, Opt, ^, $                      */ \
1459    2,                             /* Char  - the minimum length             */ \    2,                             /* Char  - the minimum length             */ \
1460    2,                             /* Charnc  - the minimum length           */ \    2,                             /* Charnc  - the minimum length           */ \
# Line 1464  in UTF-8 mode. The code that uses this t Line 1500  in UTF-8 mode. The code that uses this t
1500    3, 3,                          /* RREF, NRREF                            */ \    3, 3,                          /* RREF, NRREF                            */ \
1501    1,                             /* DEF                                    */ \    1,                             /* DEF                                    */ \
1502    1, 1,                          /* BRAZERO, BRAMINZERO                    */ \    1, 1,                          /* BRAZERO, BRAMINZERO                    */ \
1503    1, 1, 1, 1,                    /* PRUNE, SKIP, THEN, COMMIT,             */ \    3, 1, 3,                       /* MARK, PRUNE, PRUNE_ARG,                */ \
1504    1, 1, 3, 1                     /* FAIL, ACCEPT, CLOSE, SKIPZERO          */    1, 3, 1, 3,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG,        */ \
1505      1, 1, 1, 3, 1                  /* COMMIT, FAIL, ACCEPT, CLOSE, SKIPZERO  */
1506    
1507    
1508  /* A magic value for OP_RREF and OP_NRREF to indicate the "any recursion"  /* A magic value for OP_RREF and OP_NRREF to indicate the "any recursion"
# Line 1473  condition. */ Line 1510  condition. */
1510    
1511  #define RREF_ANY  0xffff  #define RREF_ANY  0xffff
1512    
1513  /* Error code numbers. They are given names so that they can more easily be  /* Compile time error code numbers. They are given names so that they can more
1514  tracked. */  easily be tracked. When a new number is added, the table called eint in
1515    pcreposix.c must be updated. */
1516    
1517  enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9,  enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9,
1518         ERR10, ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19,         ERR10, ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19,
# Line 1482  enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4, Line 1520  enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,
1520         ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,         ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
1521         ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,         ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
1522         ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,         ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
1523         ERR60, ERR61, ERR62, ERR63, ERR64, ERR65 };         ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERRCOUNT };
1524    
1525  /* The real format of the start of the pcre block; the index of names and the  /* The real format of the start of the pcre block; the index of names and the
1526  code vector run on as long as necessary after the end. We store an explicit  code vector run on as long as necessary after the end. We store an explicit
# Line 1532  typedef struct pcre_study_data { Line 1570  typedef struct pcre_study_data {
1570    
1571  /* Structure for building a chain of open capturing subpatterns during  /* Structure for building a chain of open capturing subpatterns during
1572  compiling, so that instructions to close them can be compiled when (*ACCEPT) is  compiling, so that instructions to close them can be compiled when (*ACCEPT) is
1573  encountered. */  encountered. This is also used to identify subpatterns that contain recursive
1574    back references to themselves, so that they can be made atomic. */
1575    
1576  typedef struct open_capitem {  typedef struct open_capitem {
1577    struct open_capitem *next;    /* Chain link */    struct open_capitem *next;    /* Chain link */
1578    pcre_uint16 number;           /* Capture number */    pcre_uint16 number;           /* Capture number */
1579      pcre_uint16 flag;             /* Set TRUE if recursive back ref */
1580  } open_capitem;  } open_capitem;
1581    
1582  /* Structure for passing "static" information around between the functions  /* Structure for passing "static" information around between the functions
# Line 1575  branches, for testing for left recursion Line 1615  branches, for testing for left recursion
1615    
1616  typedef struct branch_chain {  typedef struct branch_chain {
1617    struct branch_chain *outer;    struct branch_chain *outer;
1618    uschar *current;    uschar *current_branch;
1619  } branch_chain;  } branch_chain;
1620    
1621  /* Structure for items in a linked list that represents an explicit recursive  /* Structure for items in a linked list that represents an explicit recursive
# Line 1585  typedef struct recursion_info { Line 1625  typedef struct recursion_info {
1625    struct recursion_info *prevrec; /* Previous recursion record (or NULL) */    struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
1626    int group_num;                /* Number of group that was called */    int group_num;                /* Number of group that was called */
1627    const uschar *after_call;     /* "Return value": points after the call in the expr */    const uschar *after_call;     /* "Return value": points after the call in the expr */
   USPTR save_start;             /* Old value of mstart */  
1628    int *offset_save;             /* Pointer to start of saved offsets */    int *offset_save;             /* Pointer to start of saved offsets */
1629    int saved_max;                /* Number of saved offsets */    int saved_max;                /* Number of saved offsets */
1630    int offset_top;               /* Current value of offset_top */    int save_offset_top;          /* Current value of offset_top */
1631  } recursion_info;  } recursion_info;
1632    
1633  /* Structure for building a chain of data for holding the values of the subject  /* Structure for building a chain of data for holding the values of the subject
# Line 1615  typedef struct match_data { Line 1654  typedef struct match_data {
1654    int    nllen;                 /* Newline string length */    int    nllen;                 /* Newline string length */
1655    int    name_count;            /* Number of names in name table */    int    name_count;            /* Number of names in name table */
1656    int    name_entry_size;       /* Size of entry in names table */    int    name_entry_size;       /* Size of entry in names table */
1657    uschar *name_table;           /* Table of names */    uschar *name_table;           /* Table of names */
1658    uschar nl[4];                 /* Newline string when fixed */    uschar nl[4];                 /* Newline string when fixed */
1659    const uschar *lcc;            /* Points to lower casing table */    const uschar *lcc;            /* Points to lower casing table */
1660    const uschar *ctypes;         /* Points to table of type maps */    const uschar *ctypes;         /* Points to table of type maps */
# Line 1643  typedef struct match_data { Line 1682  typedef struct match_data {
1682    int    eptrn;                 /* Next free eptrblock */    int    eptrn;                 /* Next free eptrblock */
1683    recursion_info *recursive;    /* Linked list of recursion data */    recursion_info *recursive;    /* Linked list of recursion data */
1684    void  *callout_data;          /* To pass back to callouts */    void  *callout_data;          /* To pass back to callouts */
1685      const uschar *mark;           /* Mark pointer to pass back */
1686  } match_data;  } match_data;
1687    
1688  /* A similar structure is used for the same purpose by the DFA matching  /* A similar structure is used for the same purpose by the DFA matching
# Line 1734  one of the exported public functions. Th Line 1774  one of the exported public functions. Th
1774  sense, but are not part of the PCRE public API. */  sense, but are not part of the PCRE public API. */
1775    
1776  extern const uschar *_pcre_find_bracket(const uschar *, BOOL, int);  extern const uschar *_pcre_find_bracket(const uschar *, BOOL, int);
1777  extern BOOL          _pcre_is_newline(const uschar *, int, const uschar *,  extern BOOL          _pcre_is_newline(USPTR, int, USPTR, int *, BOOL);
                        int *, BOOL);  
1778  extern int           _pcre_ord2utf8(int, uschar *);  extern int           _pcre_ord2utf8(int, uschar *);
1779  extern real_pcre    *_pcre_try_flipped(const real_pcre *, real_pcre *,  extern real_pcre    *_pcre_try_flipped(const real_pcre *, real_pcre *,
1780                         const pcre_study_data *, pcre_study_data *);                         const pcre_study_data *, pcre_study_data *);
1781  extern int           _pcre_valid_utf8(const uschar *, int);  extern int           _pcre_valid_utf8(USPTR, int);
1782  extern BOOL          _pcre_was_newline(const uschar *, int, const uschar *,  extern BOOL          _pcre_was_newline(USPTR, int, USPTR, int *, BOOL);
                        int *, BOOL);  
1783  extern BOOL          _pcre_xclass(int, const uschar *);  extern BOOL          _pcre_xclass(int, const uschar *);
1784    
1785    

Legend:
Removed from v.459  
changed lines
  Added in v.510

  ViewVC Help
Powered by ViewVC 1.1.5